Exercise 2.1

help(aus_production)
# Select series of interest from datasets
aus_production <- aus_production %>% select(Quarter, Bricks)
pelt <- pelt %>% select(Year, Lynx)
gafa_stock <- gafa_stock %>% select(Date, Close)
vic_elec <- vic_elec %>% select(Time, Demand)

Looking at each of our datasets, the timescale of each is listed below:

# Plotting each series using `autoplot`
autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (`geom_line()`).

autoplot(pelt, Lynx)

autoplot(gafa_stock, Close)

autoplot(vic_elec, Demand)

# Modify  axes labels for Victoria, Aus
autoplot(vic_elec, Demand) + labs(x="Half-Hours", y="Electricity Demand")

Exercise 2.2

To find the peak (max) closing price for each stock, we’ll need to first group our data then filter by our value (in this case Close):

# First group by the stock, then find the max closing price for each symbol
gafa_stock %>% group_by(Symbol) %>% filter(Close == max(Close))
## # A tsibble: 4 x 3 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Date       Close Symbol
##   <date>     <dbl> <chr> 
## 1 2018-10-03  232. AAPL  
## 2 2018-09-04 2040. AMZN  
## 3 2018-07-25  218. FB    
## 4 2018-07-26 1268. GOOG

Exercise 2.3

tute1 <- readr::read_csv("../data/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

Now let’s remove facet_grid

mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() #+

  # facet_grid(name ~ ., scales = "free_y")

The plots are included in the same panel and share the y-axis! This can smooth out the graphs unnecessarily.

Exercise 2.4

library(USgas)
us_total <- us_total |> as_tsibble(index=year, key=state)

# Plot consumption for New England
new_england <- us_total %>%
                  filter(state == "Massachusetts" |
                         state=="Vermont" |
                         state==" New Hampshire" | 
                         state=="Maine" |
                         state=="Connecticut" |
                         state=="Rhode Island")

autoplot(new_england, y) + labs(x="Year", y="Annual Gas Consumption (millions of cubic feet)")

Exercise 2.5

# Formatting quarter: 1998-01-01 => 1998 Q1
tourism_xl <- readxl::read_excel("../data/tourism.xlsx") |>
  mutate(Quarter= yearquarter(Quarter)) |> 
  as_tsibble(index=Quarter, key=c(Region, State, Purpose))

Finding whaich combination of Region and Purpose had the maximum number of overnight trips on average.

mean_trips <- tourism_xl %>%
  group_by(Region, Purpose) %>%
  mutate(trips=mean(Trips)) %>%
  filter(trips == max(trips))

mean_trips
## # A tsibble: 24,320 x 6 [1Q]
## # Key:       Region, State, Purpose [304]
## # Groups:    Region, Purpose [304]
##    Quarter Region   State           Purpose  Trips trips
##      <qtr> <chr>    <chr>           <chr>    <dbl> <dbl>
##  1 1998 Q1 Adelaide South Australia Business  135.  156.
##  2 1998 Q2 Adelaide South Australia Business  110.  156.
##  3 1998 Q3 Adelaide South Australia Business  166.  156.
##  4 1998 Q4 Adelaide South Australia Business  127.  156.
##  5 1999 Q1 Adelaide South Australia Business  137.  156.
##  6 1999 Q2 Adelaide South Australia Business  200.  156.
##  7 1999 Q3 Adelaide South Australia Business  169.  156.
##  8 1999 Q4 Adelaide South Australia Business  134.  156.
##  9 2000 Q1 Adelaide South Australia Business  154.  156.
## 10 2000 Q2 Adelaide South Australia Business  169.  156.
## # ℹ 24,310 more rows

From our aggregated dataframe it appears Business in Adelaide produces the highest number of trips on average.

Now we can get the total trips by state using similar group_by functionality:

# Getting total trips by state using the groupo_by function
total_trips <- tourism %>% 
                group_by(State) %>%
                summarise(sum(Trips))
total_trips
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter `sum(Trips)`
##    <chr>   <qtr>        <dbl>
##  1 ACT   1998 Q1         551.
##  2 ACT   1998 Q2         416.
##  3 ACT   1998 Q3         436.
##  4 ACT   1998 Q4         450.
##  5 ACT   1999 Q1         379.
##  6 ACT   1999 Q2         558.
##  7 ACT   1999 Q3         449.
##  8 ACT   1999 Q4         595.
##  9 ACT   2000 Q1         600.
## 10 ACT   2000 Q2         557.
## # ℹ 630 more rows

Exercise 2.8

us_emp <- fpp3::us_employment %>% filter(Title == "Total Private") %>% select(Month, Employed)
# Plotting US employment for each plot type
autoplot(us_emp, Employed)

gg_season(us_emp, Employed)

gg_lag(us_emp, Employed)

gg_subseries(us_emp, Employed)  

# Selecting needed features
pelt <- tsibbledata::pelt %>% select(Year, Hare)
us_gas <- us_gasoline %>% select(Week, Barrels)
pbs <- PBS %>% filter(ATC2 == 'H02') %>% select(Month, Cost)

Plotting Pelt data first

autoplot(pelt, Hare)

# gg_season(pelt, Hare) # This is yearly data, so no seasonality
gg_lag(pelt, Hare)

gg_subseries(pelt, Hare)

Now we can plot from our aus_production dataset

autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (`geom_line()`).

gg_season(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (`geom_line()`).

gg_lag(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (gg_lag).

gg_subseries(aus_production, Bricks)
## Warning: Removed 5 rows containing missing values (`geom_line()`).

This data is a bit less granular (monthly, instead of quarterly). However, seasonal cycles can still be observed within a given year. There was an outlier year in the early 80s likely due to a larger economic issue.

Now we can plot our PBS Cost data. We definitely see seasonality within these time series, as well as a general increase over a longer time scale. Feb - May seems to be a down period for safety net payments as well.

autoplot(pbs, Cost)

gg_season(pbs, Cost)

# gg_lag(pbs, Cost) # More than one series present
gg_subseries(pbs, Cost)

Finally, we can plot out the data on US gasoline supplied. Again, we see seasonal effects present in this data. One thing about this time series is that the variance of the seasonal shifts is pretty small. In other words, the amount by which production swings due to seasonality is pretty consistent over time. Also, no larger outlier years jump out at us visually.

autoplot(us_gas, Barrels)

gg_season(us_gas, Barrels)

gg_lag(us_gas, Barrels)

gg_subseries(us_gas, Barrels)